#
#
#                    NimCrypto
#        (c) Copyright 2016 Eugene Kabanov
#
#      See the file "LICENSE", included in this
#    distribution, for details about the copyright.
#

## This module implements Twofish crypto algorithm by Bruce Schneier.
##
## Code based on `Optimized C` created by Drew Csillag
## [https://www.schneier.com/code/twofish-cpy.zip].
##
## Tests made according to official test vectors
## [https://www.schneier.com/code/ecb_ival.txt].
import utils

const
  RS_MOD = 0x14D
  RHO = 0x01010101

  RS = [
    [0x01'u8, 0xA4'u8, 0x55'u8, 0x87'u8, 0x5A'u8, 0x58'u8, 0xDB'u8, 0x9E'u8],
    [0xA4'u8, 0x56'u8, 0x82'u8, 0xF3'u8, 0x1E'u8, 0xC6'u8, 0x68'u8, 0xE5'u8],
    [0x02'u8, 0xA1'u8, 0xFC'u8, 0xC1'u8, 0x47'u8, 0xAE'u8, 0x3D'u8, 0x19'u8],
    [0xA4'u8, 0x55'u8, 0x87'u8, 0x5A'u8, 0x58'u8, 0xDB'u8, 0x9E'u8, 0x03'u8]
  ]

  Q0 = [
    0xA9'u8, 0x67'u8, 0xB3'u8, 0xE8'u8, 0x04'u8, 0xFD'u8, 0xA3'u8, 0x76'u8,
    0x9A'u8, 0x92'u8, 0x80'u8, 0x78'u8, 0xE4'u8, 0xDD'u8, 0xD1'u8, 0x38'u8,
    0x0D'u8, 0xC6'u8, 0x35'u8, 0x98'u8, 0x18'u8, 0xF7'u8, 0xEC'u8, 0x6C'u8,
    0x43'u8, 0x75'u8, 0x37'u8, 0x26'u8, 0xFA'u8, 0x13'u8, 0x94'u8, 0x48'u8,
    0xF2'u8, 0xD0'u8, 0x8B'u8, 0x30'u8, 0x84'u8, 0x54'u8, 0xDF'u8, 0x23'u8,
    0x19'u8, 0x5B'u8, 0x3D'u8, 0x59'u8, 0xF3'u8, 0xAE'u8, 0xA2'u8, 0x82'u8,
    0x63'u8, 0x01'u8, 0x83'u8, 0x2E'u8, 0xD9'u8, 0x51'u8, 0x9B'u8, 0x7C'u8,
    0xA6'u8, 0xEB'u8, 0xA5'u8, 0xBE'u8, 0x16'u8, 0x0C'u8, 0xE3'u8, 0x61'u8,
    0xC0'u8, 0x8C'u8, 0x3A'u8, 0xF5'u8, 0x73'u8, 0x2C'u8, 0x25'u8, 0x0B'u8,
    0xBB'u8, 0x4E'u8, 0x89'u8, 0x6B'u8, 0x53'u8, 0x6A'u8, 0xB4'u8, 0xF1'u8,
    0xE1'u8, 0xE6'u8, 0xBD'u8, 0x45'u8, 0xE2'u8, 0xF4'u8, 0xB6'u8, 0x66'u8,
    0xCC'u8, 0x95'u8, 0x03'u8, 0x56'u8, 0xD4'u8, 0x1C'u8, 0x1E'u8, 0xD7'u8,
    0xFB'u8, 0xC3'u8, 0x8E'u8, 0xB5'u8, 0xE9'u8, 0xCF'u8, 0xBF'u8, 0xBA'u8,
    0xEA'u8, 0x77'u8, 0x39'u8, 0xAF'u8, 0x33'u8, 0xC9'u8, 0x62'u8, 0x71'u8,
    0x81'u8, 0x79'u8, 0x09'u8, 0xAD'u8, 0x24'u8, 0xCD'u8, 0xF9'u8, 0xD8'u8,
    0xE5'u8, 0xC5'u8, 0xB9'u8, 0x4D'u8, 0x44'u8, 0x08'u8, 0x86'u8, 0xE7'u8,
    0xA1'u8, 0x1D'u8, 0xAA'u8, 0xED'u8, 0x06'u8, 0x70'u8, 0xB2'u8, 0xD2'u8,
    0x41'u8, 0x7B'u8, 0xA0'u8, 0x11'u8, 0x31'u8, 0xC2'u8, 0x27'u8, 0x90'u8,
    0x20'u8, 0xF6'u8, 0x60'u8, 0xFF'u8, 0x96'u8, 0x5C'u8, 0xB1'u8, 0xAB'u8,
    0x9E'u8, 0x9C'u8, 0x52'u8, 0x1B'u8, 0x5F'u8, 0x93'u8, 0x0A'u8, 0xEF'u8,
    0x91'u8, 0x85'u8, 0x49'u8, 0xEE'u8, 0x2D'u8, 0x4F'u8, 0x8F'u8, 0x3B'u8,
    0x47'u8, 0x87'u8, 0x6D'u8, 0x46'u8, 0xD6'u8, 0x3E'u8, 0x69'u8, 0x64'u8,
    0x2A'u8, 0xCE'u8, 0xCB'u8, 0x2F'u8, 0xFC'u8, 0x97'u8, 0x05'u8, 0x7A'u8,
    0xAC'u8, 0x7F'u8, 0xD5'u8, 0x1A'u8, 0x4B'u8, 0x0E'u8, 0xA7'u8, 0x5A'u8,
    0x28'u8, 0x14'u8, 0x3F'u8, 0x29'u8, 0x88'u8, 0x3C'u8, 0x4C'u8, 0x02'u8,
    0xB8'u8, 0xDA'u8, 0xB0'u8, 0x17'u8, 0x55'u8, 0x1F'u8, 0x8A'u8, 0x7D'u8,
    0x57'u8, 0xC7'u8, 0x8D'u8, 0x74'u8, 0xB7'u8, 0xC4'u8, 0x9F'u8, 0x72'u8,
    0x7E'u8, 0x15'u8, 0x22'u8, 0x12'u8, 0x58'u8, 0x07'u8, 0x99'u8, 0x34'u8,
    0x6E'u8, 0x50'u8, 0xDE'u8, 0x68'u8, 0x65'u8, 0xBC'u8, 0xDB'u8, 0xF8'u8,
    0xC8'u8, 0xA8'u8, 0x2B'u8, 0x40'u8, 0xDC'u8, 0xFE'u8, 0x32'u8, 0xA4'u8,
    0xCA'u8, 0x10'u8, 0x21'u8, 0xF0'u8, 0xD3'u8, 0x5D'u8, 0x0F'u8, 0x00'u8,
    0x6F'u8, 0x9D'u8, 0x36'u8, 0x42'u8, 0x4A'u8, 0x5E'u8, 0xC1'u8, 0xE0'u8
  ]

  Q1 = [
    0x75'u8, 0xF3'u8, 0xC6'u8, 0xF4'u8, 0xDB'u8, 0x7B'u8, 0xFB'u8, 0xC8'u8,
    0x4A'u8, 0xD3'u8, 0xE6'u8, 0x6B'u8, 0x45'u8, 0x7D'u8, 0xE8'u8, 0x4B'u8,
    0xD6'u8, 0x32'u8, 0xD8'u8, 0xFD'u8, 0x37'u8, 0x71'u8, 0xF1'u8, 0xE1'u8,
    0x30'u8, 0x0F'u8, 0xF8'u8, 0x1B'u8, 0x87'u8, 0xFA'u8, 0x06'u8, 0x3F'u8,
    0x5E'u8, 0xBA'u8, 0xAE'u8, 0x5B'u8, 0x8A'u8, 0x00'u8, 0xBC'u8, 0x9D'u8,
    0x6D'u8, 0xC1'u8, 0xB1'u8, 0x0E'u8, 0x80'u8, 0x5D'u8, 0xD2'u8, 0xD5'u8,
    0xA0'u8, 0x84'u8, 0x07'u8, 0x14'u8, 0xB5'u8, 0x90'u8, 0x2C'u8, 0xA3'u8,
    0xB2'u8, 0x73'u8, 0x4C'u8, 0x54'u8, 0x92'u8, 0x74'u8, 0x36'u8, 0x51'u8,
    0x38'u8, 0xB0'u8, 0xBD'u8, 0x5A'u8, 0xFC'u8, 0x60'u8, 0x62'u8, 0x96'u8,
    0x6C'u8, 0x42'u8, 0xF7'u8, 0x10'u8, 0x7C'u8, 0x28'u8, 0x27'u8, 0x8C'u8,
    0x13'u8, 0x95'u8, 0x9C'u8, 0xC7'u8, 0x24'u8, 0x46'u8, 0x3B'u8, 0x70'u8,
    0xCA'u8, 0xE3'u8, 0x85'u8, 0xCB'u8, 0x11'u8, 0xD0'u8, 0x93'u8, 0xB8'u8,
    0xA6'u8, 0x83'u8, 0x20'u8, 0xFF'u8, 0x9F'u8, 0x77'u8, 0xC3'u8, 0xCC'u8,
    0x03'u8, 0x6F'u8, 0x08'u8, 0xBF'u8, 0x40'u8, 0xE7'u8, 0x2B'u8, 0xE2'u8,
    0x79'u8, 0x0C'u8, 0xAA'u8, 0x82'u8, 0x41'u8, 0x3A'u8, 0xEA'u8, 0xB9'u8,
    0xE4'u8, 0x9A'u8, 0xA4'u8, 0x97'u8, 0x7E'u8, 0xDA'u8, 0x7A'u8, 0x17'u8,
    0x66'u8, 0x94'u8, 0xA1'u8, 0x1D'u8, 0x3D'u8, 0xF0'u8, 0xDE'u8, 0xB3'u8,
    0x0B'u8, 0x72'u8, 0xA7'u8, 0x1C'u8, 0xEF'u8, 0xD1'u8, 0x53'u8, 0x3E'u8,
    0x8F'u8, 0x33'u8, 0x26'u8, 0x5F'u8, 0xEC'u8, 0x76'u8, 0x2A'u8, 0x49'u8,
    0x81'u8, 0x88'u8, 0xEE'u8, 0x21'u8, 0xC4'u8, 0x1A'u8, 0xEB'u8, 0xD9'u8,
    0xC5'u8, 0x39'u8, 0x99'u8, 0xCD'u8, 0xAD'u8, 0x31'u8, 0x8B'u8, 0x01'u8,
    0x18'u8, 0x23'u8, 0xDD'u8, 0x1F'u8, 0x4E'u8, 0x2D'u8, 0xF9'u8, 0x48'u8,
    0x4F'u8, 0xF2'u8, 0x65'u8, 0x8E'u8, 0x78'u8, 0x5C'u8, 0x58'u8, 0x19'u8,
    0x8D'u8, 0xE5'u8, 0x98'u8, 0x57'u8, 0x67'u8, 0x7F'u8, 0x05'u8, 0x64'u8,
    0xAF'u8, 0x63'u8, 0xB6'u8, 0xFE'u8, 0xF5'u8, 0xB7'u8, 0x3C'u8, 0xA5'u8,
    0xCE'u8, 0xE9'u8, 0x68'u8, 0x44'u8, 0xE0'u8, 0x4D'u8, 0x43'u8, 0x69'u8,
    0x29'u8, 0x2E'u8, 0xAC'u8, 0x15'u8, 0x59'u8, 0xA8'u8, 0x0A'u8, 0x9E'u8,
    0x6E'u8, 0x47'u8, 0xDF'u8, 0x34'u8, 0x35'u8, 0x6A'u8, 0xCF'u8, 0xDC'u8,
    0x22'u8, 0xC9'u8, 0xC0'u8, 0x9B'u8, 0x89'u8, 0xD4'u8, 0xED'u8, 0xAB'u8,
    0x12'u8, 0xA2'u8, 0x0D'u8, 0x52'u8, 0xBB'u8, 0x02'u8, 0x2F'u8, 0xA9'u8,
    0xD7'u8, 0x61'u8, 0x1E'u8, 0xB4'u8, 0x50'u8, 0x04'u8, 0xF6'u8, 0xC2'u8,
    0x16'u8, 0x25'u8, 0x86'u8, 0x56'u8, 0x55'u8, 0x09'u8, 0xBE'u8, 0x91'u8
  ]

  mult5B = [
    0x00'u8, 0x5B'u8, 0xB6'u8, 0xED'u8, 0x05'u8, 0x5E'u8, 0xB3'u8, 0xE8'u8,
    0x0A'u8, 0x51'u8, 0xBC'u8, 0xE7'u8, 0x0F'u8, 0x54'u8, 0xB9'u8, 0xE2'u8,
    0x14'u8, 0x4F'u8, 0xA2'u8, 0xF9'u8, 0x11'u8, 0x4A'u8, 0xA7'u8, 0xFC'u8,
    0x1E'u8, 0x45'u8, 0xA8'u8, 0xF3'u8, 0x1B'u8, 0x40'u8, 0xAD'u8, 0xF6'u8,
    0x28'u8, 0x73'u8, 0x9E'u8, 0xC5'u8, 0x2D'u8, 0x76'u8, 0x9B'u8, 0xC0'u8,
    0x22'u8, 0x79'u8, 0x94'u8, 0xCF'u8, 0x27'u8, 0x7C'u8, 0x91'u8, 0xCA'u8,
    0x3C'u8, 0x67'u8, 0x8A'u8, 0xD1'u8, 0x39'u8, 0x62'u8, 0x8F'u8, 0xD4'u8,
    0x36'u8, 0x6D'u8, 0x80'u8, 0xDB'u8, 0x33'u8, 0x68'u8, 0x85'u8, 0xDE'u8,
    0x50'u8, 0x0B'u8, 0xE6'u8, 0xBD'u8, 0x55'u8, 0x0E'u8, 0xE3'u8, 0xB8'u8,
    0x5A'u8, 0x01'u8, 0xEC'u8, 0xB7'u8, 0x5F'u8, 0x04'u8, 0xE9'u8, 0xB2'u8,
    0x44'u8, 0x1F'u8, 0xF2'u8, 0xA9'u8, 0x41'u8, 0x1A'u8, 0xF7'u8, 0xAC'u8,
    0x4E'u8, 0x15'u8, 0xF8'u8, 0xA3'u8, 0x4B'u8, 0x10'u8, 0xFD'u8, 0xA6'u8,
    0x78'u8, 0x23'u8, 0xCE'u8, 0x95'u8, 0x7D'u8, 0x26'u8, 0xCB'u8, 0x90'u8,
    0x72'u8, 0x29'u8, 0xC4'u8, 0x9F'u8, 0x77'u8, 0x2C'u8, 0xC1'u8, 0x9A'u8,
    0x6C'u8, 0x37'u8, 0xDA'u8, 0x81'u8, 0x69'u8, 0x32'u8, 0xDF'u8, 0x84'u8,
    0x66'u8, 0x3D'u8, 0xD0'u8, 0x8B'u8, 0x63'u8, 0x38'u8, 0xD5'u8, 0x8E'u8,
    0xA0'u8, 0xFB'u8, 0x16'u8, 0x4D'u8, 0xA5'u8, 0xFE'u8, 0x13'u8, 0x48'u8,
    0xAA'u8, 0xF1'u8, 0x1C'u8, 0x47'u8, 0xAF'u8, 0xF4'u8, 0x19'u8, 0x42'u8,
    0xB4'u8, 0xEF'u8, 0x02'u8, 0x59'u8, 0xB1'u8, 0xEA'u8, 0x07'u8, 0x5C'u8,
    0xBE'u8, 0xE5'u8, 0x08'u8, 0x53'u8, 0xBB'u8, 0xE0'u8, 0x0D'u8, 0x56'u8,
    0x88'u8, 0xD3'u8, 0x3E'u8, 0x65'u8, 0x8D'u8, 0xD6'u8, 0x3B'u8, 0x60'u8,
    0x82'u8, 0xD9'u8, 0x34'u8, 0x6F'u8, 0x87'u8, 0xDC'u8, 0x31'u8, 0x6A'u8,
    0x9C'u8, 0xC7'u8, 0x2A'u8, 0x71'u8, 0x99'u8, 0xC2'u8, 0x2F'u8, 0x74'u8,
    0x96'u8, 0xCD'u8, 0x20'u8, 0x7B'u8, 0x93'u8, 0xC8'u8, 0x25'u8, 0x7E'u8,
    0xF0'u8, 0xAB'u8, 0x46'u8, 0x1D'u8, 0xF5'u8, 0xAE'u8, 0x43'u8, 0x18'u8,
    0xFA'u8, 0xA1'u8, 0x4C'u8, 0x17'u8, 0xFF'u8, 0xA4'u8, 0x49'u8, 0x12'u8,
    0xE4'u8, 0xBF'u8, 0x52'u8, 0x09'u8, 0xE1'u8, 0xBA'u8, 0x57'u8, 0x0C'u8,
    0xEE'u8, 0xB5'u8, 0x58'u8, 0x03'u8, 0xEB'u8, 0xB0'u8, 0x5D'u8, 0x06'u8,
    0xD8'u8, 0x83'u8, 0x6E'u8, 0x35'u8, 0xDD'u8, 0x86'u8, 0x6B'u8, 0x30'u8,
    0xD2'u8, 0x89'u8, 0x64'u8, 0x3F'u8, 0xD7'u8, 0x8C'u8, 0x61'u8, 0x3A'u8,
    0xCC'u8, 0x97'u8, 0x7A'u8, 0x21'u8, 0xC9'u8, 0x92'u8, 0x7F'u8, 0x24'u8,
    0xC6'u8, 0x9D'u8, 0x70'u8, 0x2B'u8, 0xC3'u8, 0x98'u8, 0x75'u8, 0x2E'u8
  ]

  multEF = [
    0x00'u8, 0xEF'u8, 0xB7'u8, 0x58'u8, 0x07'u8, 0xE8'u8, 0xB0'u8, 0x5F'u8,
    0x0E'u8, 0xE1'u8, 0xB9'u8, 0x56'u8, 0x09'u8, 0xE6'u8, 0xBE'u8, 0x51'u8,
    0x1C'u8, 0xF3'u8, 0xAB'u8, 0x44'u8, 0x1B'u8, 0xF4'u8, 0xAC'u8, 0x43'u8,
    0x12'u8, 0xFD'u8, 0xA5'u8, 0x4A'u8, 0x15'u8, 0xFA'u8, 0xA2'u8, 0x4D'u8,
    0x38'u8, 0xD7'u8, 0x8F'u8, 0x60'u8, 0x3F'u8, 0xD0'u8, 0x88'u8, 0x67'u8,
    0x36'u8, 0xD9'u8, 0x81'u8, 0x6E'u8, 0x31'u8, 0xDE'u8, 0x86'u8, 0x69'u8,
    0x24'u8, 0xCB'u8, 0x93'u8, 0x7C'u8, 0x23'u8, 0xCC'u8, 0x94'u8, 0x7B'u8,
    0x2A'u8, 0xC5'u8, 0x9D'u8, 0x72'u8, 0x2D'u8, 0xC2'u8, 0x9A'u8, 0x75'u8,
    0x70'u8, 0x9F'u8, 0xC7'u8, 0x28'u8, 0x77'u8, 0x98'u8, 0xC0'u8, 0x2F'u8,
    0x7E'u8, 0x91'u8, 0xC9'u8, 0x26'u8, 0x79'u8, 0x96'u8, 0xCE'u8, 0x21'u8,
    0x6C'u8, 0x83'u8, 0xDB'u8, 0x34'u8, 0x6B'u8, 0x84'u8, 0xDC'u8, 0x33'u8,
    0x62'u8, 0x8D'u8, 0xD5'u8, 0x3A'u8, 0x65'u8, 0x8A'u8, 0xD2'u8, 0x3D'u8,
    0x48'u8, 0xA7'u8, 0xFF'u8, 0x10'u8, 0x4F'u8, 0xA0'u8, 0xF8'u8, 0x17'u8,
    0x46'u8, 0xA9'u8, 0xF1'u8, 0x1E'u8, 0x41'u8, 0xAE'u8, 0xF6'u8, 0x19'u8,
    0x54'u8, 0xBB'u8, 0xE3'u8, 0x0C'u8, 0x53'u8, 0xBC'u8, 0xE4'u8, 0x0B'u8,
    0x5A'u8, 0xB5'u8, 0xED'u8, 0x02'u8, 0x5D'u8, 0xB2'u8, 0xEA'u8, 0x05'u8,
    0xE0'u8, 0x0F'u8, 0x57'u8, 0xB8'u8, 0xE7'u8, 0x08'u8, 0x50'u8, 0xBF'u8,
    0xEE'u8, 0x01'u8, 0x59'u8, 0xB6'u8, 0xE9'u8, 0x06'u8, 0x5E'u8, 0xB1'u8,
    0xFC'u8, 0x13'u8, 0x4B'u8, 0xA4'u8, 0xFB'u8, 0x14'u8, 0x4C'u8, 0xA3'u8,
    0xF2'u8, 0x1D'u8, 0x45'u8, 0xAA'u8, 0xF5'u8, 0x1A'u8, 0x42'u8, 0xAD'u8,
    0xD8'u8, 0x37'u8, 0x6F'u8, 0x80'u8, 0xDF'u8, 0x30'u8, 0x68'u8, 0x87'u8,
    0xD6'u8, 0x39'u8, 0x61'u8, 0x8E'u8, 0xD1'u8, 0x3E'u8, 0x66'u8, 0x89'u8,
    0xC4'u8, 0x2B'u8, 0x73'u8, 0x9C'u8, 0xC3'u8, 0x2C'u8, 0x74'u8, 0x9B'u8,
    0xCA'u8, 0x25'u8, 0x7D'u8, 0x92'u8, 0xCD'u8, 0x22'u8, 0x7A'u8, 0x95'u8,
    0x90'u8, 0x7F'u8, 0x27'u8, 0xC8'u8, 0x97'u8, 0x78'u8, 0x20'u8, 0xCF'u8,
    0x9E'u8, 0x71'u8, 0x29'u8, 0xC6'u8, 0x99'u8, 0x76'u8, 0x2E'u8, 0xC1'u8,
    0x8C'u8, 0x63'u8, 0x3B'u8, 0xD4'u8, 0x8B'u8, 0x64'u8, 0x3C'u8, 0xD3'u8,
    0x82'u8, 0x6D'u8, 0x35'u8, 0xDA'u8, 0x85'u8, 0x6A'u8, 0x32'u8, 0xDD'u8,
    0xA8'u8, 0x47'u8, 0x1F'u8, 0xF0'u8, 0xAF'u8, 0x40'u8, 0x18'u8, 0xF7'u8,
    0xA6'u8, 0x49'u8, 0x11'u8, 0xFE'u8, 0xA1'u8, 0x4E'u8, 0x16'u8, 0xF9'u8,
    0xB4'u8, 0x5B'u8, 0x03'u8, 0xEC'u8, 0xB3'u8, 0x5C'u8, 0x04'u8, 0xEB'u8,
    0xBA'u8, 0x55'u8, 0x0D'u8, 0xE2'u8, 0xBD'u8, 0x52'u8, 0x0A'u8, 0xE5'u8
  ]

type
  TwofishContext[bits: static[uint]] = object
    S: array[4, array[256, uint32]]
    K: array[40, uint32]

  twofish128* = TwofishContext[128]
  twofish192* = TwofishContext[192]
  twofish256* = TwofishContext[256]
  twofish* = twofish128 | twofish192 | twofish256

template bn(x, n: uint32): byte =
  cast[byte]((x shr (n * 8)) and 0xFF'u32)
template b0(x: uint32): byte =
  cast[byte](x)
template b1(x: uint32): byte =
  cast[byte](x shr 8)
template b2(x: uint32): byte =
  cast[byte](x shr 16)
template b3(x: uint32): byte =
  cast[byte](x shr 24)

template BYTEARRAY_TO_U32(arr): uint32 =
  ((uint32(arr[0]) shl 24) xor (uint32(arr[1]) shl 16) xor
   (uint32(arr[2]) shl 8) xor (uint32(arr[3])))
template BYTES_TO_U32(r0, r1, r2, r3): uint32 =
  ((uint32(r0) shl 24) xor (uint32(r1) shl 16) xor
   (uint32(r2) shl 8) xor (uint32(r3)))

proc polyMult(a, b: uint32): uint32 =
  result = 0'u32
  var va = a
  var vb = b
  while va != 0:
    if (va and 1) != 0: result = result xor vb
    vb = vb shl 1
    va = va shr 1

proc gfMod(t, modulus: uint32): uint32 =
  var vmodulus = modulus shl 7
  result = t
  for i in 0..<8:
    var tt = result xor vmodulus
    if tt < result: result = tt
    vmodulus = vmodulus shr 1

template gfMult(a, b, modulus: uint32): uint32 =
  gfMod(polyMult(a, b), modulus)

proc rsMatrixMultiply(sd: array[8, byte]): uint32 =
  var res = [0'u32, 0'u32, 0'u32, 0'u32]
  for j in 0..<4:
    var t = 0'u32
    for k in 0..<8:
      t = t xor gfMult(RS[j][k], sd[k], RS_MOD)
    res[3 - j] = t
  result = BYTEARRAY_TO_U32(res)

proc h(ax: uint32, al: array[4, uint32], k: uint32): uint32 =
  var y0 = b0(ax)
  var y1 = b1(ax)
  var y2 = b2(ax)
  var y3 = b3(ax)

  if k == 4:
    y0 = Q1[y0] xor b0(al[3])
    y1 = Q0[y1] xor b1(al[3])
    y2 = Q0[y2] xor b2(al[3])
    y3 = Q1[y3] xor b3(al[3])
  if k == 4 or k == 3:
    y0 = Q1[y0] xor b0(al[2])
    y1 = Q1[y1] xor b1(al[2])
    y2 = Q0[y2] xor b2(al[2])
    y3 = Q0[y3] xor b3(al[2])
  if k == 4 or k == 3 or k == 2:
    y0 = Q1[Q0[Q0[y0] xor b0(al[1])] xor b0(al[0])]
    y1 = Q0[Q0[Q1[y1] xor b1(al[1])] xor b1(al[0])]
    y2 = Q1[Q1[Q0[y2] xor b2(al[1])] xor b2(al[0])]
    y3 = Q0[Q1[Q1[y3] xor b3(al[1])] xor b3(al[0])]

  var z0 = multEF[y0] xor y1 xor multEF[y2] xor mult5B[y3]
  var z1 = multEF[y0] xor mult5B[y1] xor y2 xor multEF[y3]
  var z2 = mult5B[y0] xor multEF[y1] xor multEF[y2] xor y3
  var z3 = y0 xor multEF[y1] xor mult5B[y2] xor mult5B[y3]

  result = BYTES_TO_U32(z0, z1, z2, z3)

proc fullKey(al: array[4, uint32], k: int32,
             QF: var array[4, array[256, uint32]]) =
  for i in 0..<256:
    var y0 = byte(i)
    var y1 = byte(i)
    var y2 = byte(i)
    var y3 = byte(i)

    if k == 4:
      y0 = Q1[y0] xor b0(al[3])
      y1 = Q0[y1] xor b1(al[3])
      y2 = Q0[y2] xor b2(al[3])
      y3 = Q1[y3] xor b3(al[3])
    if k == 4 or k == 3:
      y0 = Q1[y0] xor b0(al[2])
      y1 = Q1[y1] xor b1(al[2])
      y2 = Q0[y2] xor b2(al[2])
      y3 = Q0[y3] xor b3(al[2])
    if k == 4 or k == 3 or k == 2:
      y0 = Q1[Q0[Q0[y0] xor b0(al[1])] xor b0(al[0])]
      y1 = Q0[Q0[Q1[y1] xor b1(al[1])] xor b1(al[0])]
      y2 = Q1[Q1[Q0[y2] xor b2(al[1])] xor b2(al[0])]
      y3 = Q0[Q1[Q1[y3] xor b3(al[1])] xor b3(al[0])]

    QF[0][i] = ((multEF[y0].uint32 shl 24) or
                (multEF[y0].uint32 shl 16) or
                (mult5B[y0].uint32 shl 8) or
                uint32(y0))
    QF[1][i] = ((y1.uint32 shl 24'u32) or
                (mult5B[y1].uint32 shl 16) or
                (multEF[y1].uint32 shl 8) or
                (multEF[y1].uint32))
    QF[2][i] = ((multEF[y2].uint32 shl 24) or
                (y2.uint32 shl 16) or
                (multEF[y2].uint32 shl 8) or
                (mult5B[y2].uint32))
    QF[3][i] = ((mult5B[y3].uint32 shl 24) or
                (multEF[y3].uint32 shl 16) or
                (y3.uint32 shl 8) or
                mult5B[y3].uint32)

# fully keyed h (aka g) function
template fkh(S, X): uint32 =
  (S[0][b0(X)] xor S[1][b1(X)] xor S[2][b2(X)] xor S[3][b3(X)])

template ENC_ROUND(CTX, R0, R1, R2, R3, round) =
  T0 = fkh(CTX.S, R0)
  T1 = fkh(CTX.S, ROL(R1, 8))
  R2 = ROR(R2 xor (T1 + T0 + CTX.K[2 * round + 8]), 1)
  R3 = ROL(R3, 1) xor (2'u32 * T1 + T0 + CTX.K[2 * round + 9])

template DEC_ROUND(CTX, R0, R1, R2, R3, round) =
  T0 = fkh(CTX.S, R0)
  T1 = fkh(CTX.S, ROL(R1, 8))
  R2 = ROL(R2, 1) xor (T0 + T1 + CTX.K[2 * round + 8])
  R3 = ROR(R3 xor (T0 + 2'u32 * T1 + CTX.K[2 * round + 9]), 1)

proc twofishEncrypt(ctx: var TwofishContext, inp: openArray[byte],
                    oup: var openArray[byte]) {.inline.} =
  var T0, T1: uint32

  var r3 = ctx.K[3] xor leLoad32(inp, 12)
  var r2 = ctx.K[2] xor leLoad32(inp, 8)
  var r1 = ctx.K[1] xor leLoad32(inp, 4)
  var r0 = ctx.K[0] xor leLoad32(inp, 0)

  ENC_ROUND(ctx, r0, r1, r2, r3, 0)
  ENC_ROUND(ctx, r2, r3, r0, r1, 1)
  ENC_ROUND(ctx, r0, r1, r2, r3, 2)
  ENC_ROUND(ctx, r2, r3, r0, r1, 3)
  ENC_ROUND(ctx, r0, r1, r2, r3, 4)
  ENC_ROUND(ctx, r2, r3, r0, r1, 5)
  ENC_ROUND(ctx, r0, r1, r2, r3, 6)
  ENC_ROUND(ctx, r2, r3, r0, r1, 7)
  ENC_ROUND(ctx, r0, r1, r2, r3, 8)
  ENC_ROUND(ctx, r2, r3, r0, r1, 9)
  ENC_ROUND(ctx, r0, r1, r2, r3, 10)
  ENC_ROUND(ctx, r2, r3, r0, r1, 11)
  ENC_ROUND(ctx, r0, r1, r2, r3, 12)
  ENC_ROUND(ctx, r2, r3, r0, r1, 13)
  ENC_ROUND(ctx, r0, r1, r2, r3, 14)
  ENC_ROUND(ctx, r2, r3, r0, r1, 15)

  leStore32(oup, 12, r1 xor ctx.K[7])
  leStore32(oup, 8, r0 xor ctx.K[6])
  leStore32(oup, 4, r3 xor ctx.K[5])
  leStore32(oup, 0, r2 xor ctx.K[4])

proc twofishDecrypt(ctx: var TwofishContext, inp: openArray[byte],
                    oup: var openArray[byte]) {.inline.} =
  var T0, T1: uint32

  var r3 = ctx.K[7] xor leLoad32(inp, 12)
  var r2 = ctx.K[6] xor leLoad32(inp, 8)
  var r1 = ctx.K[5] xor leLoad32(inp, 4)
  var r0 = ctx.K[4] xor leLoad32(inp, 0)

  DEC_ROUND(ctx, r0, r1, r2, r3, 15)
  DEC_ROUND(ctx, r2, r3, r0, r1, 14)
  DEC_ROUND(ctx, r0, r1, r2, r3, 13)
  DEC_ROUND(ctx, r2, r3, r0, r1, 12)
  DEC_ROUND(ctx, r0, r1, r2, r3, 11)
  DEC_ROUND(ctx, r2, r3, r0, r1, 10)
  DEC_ROUND(ctx, r0, r1, r2, r3, 9)
  DEC_ROUND(ctx, r2, r3, r0, r1, 8)
  DEC_ROUND(ctx, r0, r1, r2, r3, 7)
  DEC_ROUND(ctx, r2, r3, r0, r1, 6)
  DEC_ROUND(ctx, r0, r1, r2, r3, 5)
  DEC_ROUND(ctx, r2, r3, r0, r1, 4)
  DEC_ROUND(ctx, r0, r1, r2, r3, 3)
  DEC_ROUND(ctx, r2, r3, r0, r1, 2)
  DEC_ROUND(ctx, r0, r1, r2, r3, 1)
  DEC_ROUND(ctx, r2, r3, r0, r1, 0)

  leStore32(oup, 12, r1 xor ctx.K[3])
  leStore32(oup, 8, r0 xor ctx.K[2])
  leStore32(oup, 4, r3 xor ctx.K[1])
  leStore32(oup, 0, r2 xor ctx.K[0])

proc initTwofishContext(ctx: var TwofishContext, N: int,
                        key: openArray[byte]) =
  var
    A, B: uint32

  var Mo = [0'u32, 0'u32, 0'u32, 0'u32]
  var Me = [0'u32, 0'u32, 0'u32, 0'u32]
  var S = [0'u32, 0'u32, 0'u32, 0'u32]
  var vector = [0'u8, 0'u8, 0'u8, 0'u8, 0'u8, 0'u8, 0'u8, 0'u8]
  let k = (N + 63) div 64

  for i in 0..<k:
    Me[i] = leLoad32(key, 8 * i)
    Mo[i] = leLoad32(key, 4 + 8 * i)

  for i in 0..<k:
    for j in 0..<4:
      vector[j] = bn(Me[i], j.uint32)
      vector[j + 4] = bn(Mo[i], j.uint32)
    S[k - i - 1] = rsMatrixMultiply(vector)

  for i in 0..<20:
    A = h(uint32(2 * i * RHO), Me, k.uint32)
    B = ROL(h(uint32(2 * i * RHO + RHO), Mo, k.uint32), 8)
    ctx.K[2 * i] = A + B
    ctx.K[2 * i + 1] = ROL(uint32(A + 2'u32 * B), 9)

  fullKey(S, k.int32, ctx.S)

template sizeKey*(ctx: TwofishContext): int =
  (ctx.bits div 8)

template sizeBlock*(ctx: TwofishContext): int =
  (16)

template sizeKey*(r: typedesc[twofish]): int =
  when r is twofish128:
    (16)
  elif r is twofish192:
    (24)
  elif r is twofish256:
    (32)

template sizeBlock*(r: typedesc[twofish]): int =
  (16)

proc init*(ctx: var TwofishContext, key: openArray[byte]) {.inline.} =
  initTwofishContext(ctx, ctx.bits, key)

proc init*(ctx: var TwofishContext, key: ptr byte, nkey: int = 0) {.inline.} =
  var p = cast[ptr UncheckedArray[byte]](key)
  initTwofishContext(ctx, ctx.bits,
                     toOpenArray(p, 0, int(ctx.sizeKey()) - 1))

proc clear*(ctx: var TwofishContext) {.inline.} =
  burnMem(ctx)

proc encrypt*(ctx: var TwofishContext, input: openArray[byte],
              output: var openArray[byte]) {.inline.} =
  twofishEncrypt(ctx, input, output)

proc decrypt*(ctx: var TwofishContext, input: openArray[byte],
              output: var openArray[byte]) {.inline.} =
  twofishDecrypt(ctx, input, output)

proc encrypt*(ctx: var TwofishContext, inbytes: ptr byte,
              outbytes: ptr byte) {.inline.} =
  var ip = cast[ptr UncheckedArray[byte]](inbytes)
  var op = cast[ptr UncheckedArray[byte]](outbytes)
  twofishEncrypt(ctx, toOpenArray(ip, 0, ctx.sizeBlock() - 1),
                      toOpenArray(op, 0, ctx.sizeBlock() - 1))

proc decrypt*(ctx: var TwofishContext, inbytes: ptr byte,
              outbytes: ptr byte) {.inline.} =
  var ip = cast[ptr UncheckedArray[byte]](inbytes)
  var op = cast[ptr UncheckedArray[byte]](outbytes)
  twofishDecrypt(ctx, toOpenArray(ip, 0, ctx.sizeBlock() - 1),
                      toOpenArray(op, 0, ctx.sizeBlock() - 1))
